{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**计算WOE值和IV值**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-03-28T04:12:53.028691Z",
     "start_time": "2019-03-28T04:12:53.019691Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "def createDateset():\n",
    "    dataSet=[\n",
    "            [0, 1, 0],\n",
    "            [0, 0, 0],\n",
    "            [0, 1, 0],\n",
    "            [1, 0, 1],\n",
    "            [1, 0, 0],\n",
    "            [1, 1, 1],\n",
    "            [0, 1, 1],\n",
    "            [1, 1, 1],\n",
    "            [1, 0, 1],\n",
    "            [1, 0, 1]]\n",
    "    return dataSet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-03-28T04:13:27.282691Z",
     "start_time": "2019-03-28T04:13:27.259691Z"
    }
   },
   "outputs": [],
   "source": [
    "def calcWOE(dataset,col,targe):\n",
    "    subdata=pd.DataFrame(dataset.groupby(col)[col].count())\n",
    "    suby=pd.DataFrame(dataset.groupby(col)[targe].sum())\n",
    "    data=pd.DataFrame(pd.merge(subdata,suby,how=\"left\",left_index=True,right_index=True))\n",
    "    b_total=data[targe].sum()\n",
    "    total=data[col].sum()\n",
    "    g_total=total-b_total\n",
    "    data[\"bad\"]=data.apply(lambda x:round(x[targe]/b_total,3),axis=1)\n",
    "    data[\"good\"]=data.apply(lambda x:round((x[col]-x[targe])/g_total,3),axis=1)\n",
    "    data[\"WOE\"]=data.apply(lambda x:np.log(x.bad/x.good),axis=1)\n",
    "    return data.loc[:,[\"bad\",\"good\",\"WOE\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-03-28T04:13:28.055691Z",
     "start_time": "2019-03-28T04:13:28.042691Z"
    }
   },
   "outputs": [],
   "source": [
    "def calcIV(dataset):\n",
    "    dataset[\"IV\"]=dataset.apply(lambda x:(x.bad-x.good)*x.WOE,axis=1)\n",
    "    IV=sum(dataset[\"IV\"])\n",
    "    return IV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-03-28T06:57:12.206305Z",
     "start_time": "2019-03-28T06:57:12.171305Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      bad  good       WOE\n",
      "x1                       \n",
      "0   0.167  0.75 -1.502079\n",
      "1   0.833  0.25  1.203573\n",
      "The IV of x1 is: 1.5773951850378087\n"
     ]
    }
   ],
   "source": [
    "if __name__ == '__main__':\n",
    "    data=createDateset()\n",
    "    data=pd.DataFrame(data,columns=[\"x1\",\"x2\",\"y\"])\n",
    "    data_WOE=calcWOE(data,\"x1\",\"y\")\n",
    "    print(data_WOE)\n",
    "    data_IV=calcIV(data_WOE)\n",
    "    print(\"The IV of %s is:\"%\"x1\",data_IV)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class bin_WOE_IV:\n",
    "    def __init__(self,eps=0.00001):"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 以spambase数据集为例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
